{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eOcfPqAVFu0n"
      },
      "source": [
        "# RAG Pipeline With Keras NLP, MongoDB and OpenAI\n",
        "\n",
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/rag/rag_pipeline_kerasnlp_mongodb_gemma2.ipynb)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2BxmHbX7HUEw"
      },
      "source": [
        "## Set Up Libraries"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3QualpTXU4A2",
        "outputId": "053f13a2-b1a4-4033-89c3-013013591337"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/327.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━\u001b[0m \u001b[32m204.8/327.5 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m327.5/327.5 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "# Install all deps\n",
        "!pip --quiet install keras\n",
        "!pip --quiet install keras-nlp\n",
        "!pip --quiet install --upgrade --quiet datasets pandas pymongo\n",
        "!pip --quiet install openai"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "mZdrwUHKHYOV"
      },
      "source": [
        "## Set Up Environment Variables"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 53,
      "metadata": {
        "id": "L-rmqgDgTmuk"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "os.environ[\"KERAS_BACKEND\"] = \"jax\"  # Or \"tensorflow\" or \"torch\".\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lPzv3-oiHeLH"
      },
      "source": [
        "## Data Loading"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 23,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0
        },
        "id": "1tPaGTkA_UaN",
        "outputId": "b4b2f0b7-f68d-44df-990b-a65e3b3577d3"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Repo card metadata block was not found. Setting CardData to empty.\n",
            "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n"
          ]
        },
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"dataset_df\",\n  \"rows\": 4000,\n  \"fields\": [\n    {\n      \"column\": \"id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.11548448669265783,\n        \"min\": 704.0001,\n        \"max\": 704.4,\n        \"num_unique_values\": 4000,\n        \"samples\": [\n          704.0556,\n          704.3492,\n          704.0528\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"submitter\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3488,\n        \"samples\": [\n          \"Subhrajyoti Biswas\",\n          \"Rui Dilao\",\n          \"Pasha Zusmanovich\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"authors\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3817,\n        \"samples\": [\n          \"Le Song, Alex Smola, Arthur Gretton, Karsten Borgwardt, Justin Bedo\",\n          \"Chris Austin\",\n          \"M.V. Budyansky, M.Yu. Uleysky, and S.V. Prants\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"title\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3999,\n        \"samples\": [\n          \"GLSM's for partial flag manifolds\",\n          \"A Subaru/Suprime-Cam Survey of M31's spheroid along the South-East minor\\n  axis\",\n          \"Fractional Generalization of Kac Integral\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"comments\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2894,\n        \"samples\": [\n          \"Accepted for publication in New Journal of Physics\",\n          \"29 pages, LaTeX, 4 figures, 7 tables\",\n          \"10 pages, 19 figures. Accepted for publication in MNRAS\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"journal-ref\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2196,\n        \"samples\": [\n          \"Univ. Beograd. Publ. Elektrotehn. Fak., Ser. Mat. 7 (1996),\\n  105--109\",\n          \"Phys.Rev.D76:056001,2007\",\n          \"Phys. Rev. A 76, 051601(R) (2007)\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"doi\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2557,\n        \"samples\": [\n          \"10.1086/518879\",\n          \"10.1016/j.nuclphysbps.2006.12.094\",\n          \"10.1103/PhysRevD.76.014010\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"report-no\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 334,\n        \"samples\": [\n          \"Accepted (12/03/2007) for publication in A&A main journal\",\n          \"LMU-ASC 28/07\",\n          \"LIGO-P060016-C-Z\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"categories\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 819,\n        \"samples\": [\n          \"cond-mat.stat-mech\",\n          \"math.LO\",\n          \"cond-mat.other cond-mat.supr-con\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"license\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"http://arxiv.org/licenses/nonexclusive-distrib/1.0/\",\n          \"http://creativecommons.org/licenses/by-nc-nd/4.0/\",\n          \"http://creativecommons.org/licenses/publicdomain/\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"abstract\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 3998,\n        \"samples\": [\n          \"  In the present letter we suggest a new theoretical model for a quantitative\\ndescription of the magnetoreception mechanism in birds. The considered\\nmechanism involves two types of iron minerals (magnetite and maghemite) which\\nwere found in subcellular compartments within sensory dendrites of the upper\\nbeak of several bird species. The analysis of forces acting between the iron\\nparticles shows that the orientation of the external geomagnetic field can\\nsignificantly change the probability of the mechanosensitive ion channels\\nopening and closing. The performed theoretical analysis shows that the\\nsuggested magnetoreceptor system might be a sensitive biological magnetometer\\nproviding an essential part of the magnetic map for navigation.\\n\",\n          \"  We calculate the CP-violating ratio epsilon'/epsilon in the Littlest Higgs\\nmodel with T-parity (LHT) and investigate its correlations with the branching\\nratios for K_L -> pi^0 nu {bar nu}, K_L -> pi^0 l^+ l^- and K^+ -> pi^+ nu {bar\\nnu}. The resulting correlations are rather strong in the case of K_L decays,\\nbut less pronounced in the case of K^+ -> pi^+ nu {bar nu}. Unfortunately, they\\nare subject to large hadronic uncertainties present in epsilon'/epsilon, whose\\ntheoretical prediction in the Standard Model (SM) is reviewed and updated here.\\nWith the matrix elements of Q_6 (gluon penguin) and Q_8 (electroweak penguin)\\nevaluated in the large-N limit and m_s^MS(2 GeV) = 100 MeV from lattice QCD,\\n(epsilon'/epsilon)_SM turns out to be close to the data so that significant\\ndepartures of Br(K_L -> pi^0 nu {bar nu}) and Br(K_L -> pi^0 l^+ l^-) from the\\nSM expectations are unlikely, while Br(K^+ -> pi^+ nu {bar nu}) can be enhanced\\neven by a factor 5. On the other hand, modest departures of the relevant\\nhadronic matrix elements from their large-N values allow for a consistent\\ndescription of epsilon'/epsilon within the LHT model accompanied by large\\nenhancements of Br(K_L -> pi^0 nu {bar nu}) and Br(K_L -> pi^0 l^+ l^-), but\\nonly modest enhancements of Br(K^+ -> pi^+ nu {bar nu}).\\n\",\n          \"  We examine the orbits of satellite galaxies identified in a suite of\\nN-body/gasdynamical simulations of the formation of $L_*$ galaxies in a LCDM\\nuniverse. Most satellites follow conventional orbits; after turning around,\\nthey accrete into their host halo and settle on orbits whose apocentric radii\\nare steadily eroded by dynamical friction. However, a number of outliers are\\nalso present, we find that ~1/3 of satellites identified at $z=0$ are on\\nunorthodox orbits, with apocenters that exceed their turnaround radii. This\\npopulation of satellites on extreme orbits consists typically of the faint\\nmember of a satellite pair that has been ejected onto a highly-energetic orbit\\nduring its first approach to the primary. Since the concurrent accretion of\\nmultiple satellite systems is a defining feature of hierarchical models of\\ngalaxy formation, we speculate that this three-body ejection mechanism may be\\nthe origin of (i) some of the newly discovered high-speed satellites around M31\\n(such as Andromeda XIV); (ii) some of the distant fast-receding Local Group\\nmembers, such as Leo I; and (iii) the oddly isolated dwarf spheroidals Cetus\\nand Tucana in the outskirts of the Local Group. Our results suggest that care\\nmust be exercised when using the orbits of the most weakly bound satellites to\\nplace constraints on the total mass of the Local Group.\\n\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"versions\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"update_date\",\n      \"properties\": {\n        \"dtype\": \"date\",\n        \"min\": \"2007-05-23 00:00:00\",\n        \"max\": \"2024-02-13 00:00:00\",\n        \"num_unique_values\": 743,\n        \"samples\": [\n          \"2009-01-05 00:00:00\",\n          \"2016-01-11 00:00:00\",\n          \"2010-09-27 00:00:00\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"authors_parsed\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embedding\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "dataset_df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-251c5868-6d7c-4b93-a071-6cbb51375179\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>submitter</th>\n",
              "      <th>authors</th>\n",
              "      <th>title</th>\n",
              "      <th>comments</th>\n",
              "      <th>journal-ref</th>\n",
              "      <th>doi</th>\n",
              "      <th>report-no</th>\n",
              "      <th>categories</th>\n",
              "      <th>license</th>\n",
              "      <th>abstract</th>\n",
              "      <th>versions</th>\n",
              "      <th>update_date</th>\n",
              "      <th>authors_parsed</th>\n",
              "      <th>embedding</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>704.0001</td>\n",
              "      <td>Pavel Nadolsky</td>\n",
              "      <td>C. Bal\\'azs, E. L. Berger, P. M. Nadolsky, C.-...</td>\n",
              "      <td>Calculation of prompt diphoton production cros...</td>\n",
              "      <td>37 pages, 15 figures; published version</td>\n",
              "      <td>Phys.Rev.D76:013009,2007</td>\n",
              "      <td>10.1103/PhysRevD.76.013009</td>\n",
              "      <td>ANL-HEP-PR-07-12</td>\n",
              "      <td>hep-ph</td>\n",
              "      <td>None</td>\n",
              "      <td>A fully differential calculation in perturba...</td>\n",
              "      <td>[{'version': 'v1', 'created': 'Mon, 2 Apr 2007...</td>\n",
              "      <td>2008-11-26</td>\n",
              "      <td>[[Balázs, C., ], [Berger, E. L., ], [Nadolsky,...</td>\n",
              "      <td>[0.0594153292, -0.0440569334, -0.0487333685, -...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>704.0002</td>\n",
              "      <td>Louis Theran</td>\n",
              "      <td>Ileana Streinu and Louis Theran</td>\n",
              "      <td>Sparsity-certifying Graph Decompositions</td>\n",
              "      <td>To appear in Graphs and Combinatorics</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>math.CO cs.CG</td>\n",
              "      <td>http://arxiv.org/licenses/nonexclusive-distrib...</td>\n",
              "      <td>We describe a new algorithm, the $(k,\\ell)$-...</td>\n",
              "      <td>[{'version': 'v1', 'created': 'Sat, 31 Mar 200...</td>\n",
              "      <td>2008-12-13</td>\n",
              "      <td>[[Streinu, Ileana, ], [Theran, Louis, ]]</td>\n",
              "      <td>[0.0247399714, -0.065658465, 0.0201423876, -0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>704.0003</td>\n",
              "      <td>Hongjun Pan</td>\n",
              "      <td>Hongjun Pan</td>\n",
              "      <td>The evolution of the Earth-Moon system based o...</td>\n",
              "      <td>23 pages, 3 figures</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>physics.gen-ph</td>\n",
              "      <td>None</td>\n",
              "      <td>The evolution of Earth-Moon system is descri...</td>\n",
              "      <td>[{'version': 'v1', 'created': 'Sun, 1 Apr 2007...</td>\n",
              "      <td>2008-01-13</td>\n",
              "      <td>[[Pan, Hongjun, ]]</td>\n",
              "      <td>[0.0491479263, 0.0728017688, 0.0604138002, 0.0...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>704.0004</td>\n",
              "      <td>David Callan</td>\n",
              "      <td>David Callan</td>\n",
              "      <td>A determinant of Stirling cycle numbers counts...</td>\n",
              "      <td>11 pages</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>math.CO</td>\n",
              "      <td>None</td>\n",
              "      <td>We show that a determinant of Stirling cycle...</td>\n",
              "      <td>[{'version': 'v1', 'created': 'Sat, 31 Mar 200...</td>\n",
              "      <td>2007-05-23</td>\n",
              "      <td>[[Callan, David, ]]</td>\n",
              "      <td>[0.0389556214, -0.0410280302, 0.0410280302, -0...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>704.0005</td>\n",
              "      <td>Alberto Torchinsky</td>\n",
              "      <td>Wael Abu-Shammala and Alberto Torchinsky</td>\n",
              "      <td>From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...</td>\n",
              "      <td>None</td>\n",
              "      <td>Illinois J. Math. 52 (2008) no.2, 681-689</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "      <td>math.CA math.FA</td>\n",
              "      <td>None</td>\n",
              "      <td>In this paper we show how to compute the $\\L...</td>\n",
              "      <td>[{'version': 'v1', 'created': 'Mon, 2 Apr 2007...</td>\n",
              "      <td>2013-10-15</td>\n",
              "      <td>[[Abu-Shammala, Wael, ], [Torchinsky, Alberto, ]]</td>\n",
              "      <td>[0.118412666, -0.0127423415, 0.1185125113, 0.0...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-251c5868-6d7c-4b93-a071-6cbb51375179')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-251c5868-6d7c-4b93-a071-6cbb51375179 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-251c5868-6d7c-4b93-a071-6cbb51375179');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-eb529229-119d-4068-9233-22870b2171e7\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-eb529229-119d-4068-9233-22870b2171e7')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-eb529229-119d-4068-9233-22870b2171e7 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "         id           submitter  \\\n",
              "0  704.0001      Pavel Nadolsky   \n",
              "1  704.0002        Louis Theran   \n",
              "2  704.0003         Hongjun Pan   \n",
              "3  704.0004        David Callan   \n",
              "4  704.0005  Alberto Torchinsky   \n",
              "\n",
              "                                             authors  \\\n",
              "0  C. Bal\\'azs, E. L. Berger, P. M. Nadolsky, C.-...   \n",
              "1                    Ileana Streinu and Louis Theran   \n",
              "2                                        Hongjun Pan   \n",
              "3                                       David Callan   \n",
              "4           Wael Abu-Shammala and Alberto Torchinsky   \n",
              "\n",
              "                                               title  \\\n",
              "0  Calculation of prompt diphoton production cros...   \n",
              "1           Sparsity-certifying Graph Decompositions   \n",
              "2  The evolution of the Earth-Moon system based o...   \n",
              "3  A determinant of Stirling cycle numbers counts...   \n",
              "4  From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...   \n",
              "\n",
              "                                  comments  \\\n",
              "0  37 pages, 15 figures; published version   \n",
              "1    To appear in Graphs and Combinatorics   \n",
              "2                      23 pages, 3 figures   \n",
              "3                                 11 pages   \n",
              "4                                     None   \n",
              "\n",
              "                                 journal-ref                         doi  \\\n",
              "0                   Phys.Rev.D76:013009,2007  10.1103/PhysRevD.76.013009   \n",
              "1                                       None                        None   \n",
              "2                                       None                        None   \n",
              "3                                       None                        None   \n",
              "4  Illinois J. Math. 52 (2008) no.2, 681-689                        None   \n",
              "\n",
              "          report-no       categories  \\\n",
              "0  ANL-HEP-PR-07-12           hep-ph   \n",
              "1              None    math.CO cs.CG   \n",
              "2              None   physics.gen-ph   \n",
              "3              None          math.CO   \n",
              "4              None  math.CA math.FA   \n",
              "\n",
              "                                             license  \\\n",
              "0                                               None   \n",
              "1  http://arxiv.org/licenses/nonexclusive-distrib...   \n",
              "2                                               None   \n",
              "3                                               None   \n",
              "4                                               None   \n",
              "\n",
              "                                            abstract  \\\n",
              "0    A fully differential calculation in perturba...   \n",
              "1    We describe a new algorithm, the $(k,\\ell)$-...   \n",
              "2    The evolution of Earth-Moon system is descri...   \n",
              "3    We show that a determinant of Stirling cycle...   \n",
              "4    In this paper we show how to compute the $\\L...   \n",
              "\n",
              "                                            versions update_date  \\\n",
              "0  [{'version': 'v1', 'created': 'Mon, 2 Apr 2007...  2008-11-26   \n",
              "1  [{'version': 'v1', 'created': 'Sat, 31 Mar 200...  2008-12-13   \n",
              "2  [{'version': 'v1', 'created': 'Sun, 1 Apr 2007...  2008-01-13   \n",
              "3  [{'version': 'v1', 'created': 'Sat, 31 Mar 200...  2007-05-23   \n",
              "4  [{'version': 'v1', 'created': 'Mon, 2 Apr 2007...  2013-10-15   \n",
              "\n",
              "                                      authors_parsed  \\\n",
              "0  [[Balázs, C., ], [Berger, E. L., ], [Nadolsky,...   \n",
              "1           [[Streinu, Ileana, ], [Theran, Louis, ]]   \n",
              "2                                 [[Pan, Hongjun, ]]   \n",
              "3                                [[Callan, David, ]]   \n",
              "4  [[Abu-Shammala, Wael, ], [Torchinsky, Alberto, ]]   \n",
              "\n",
              "                                           embedding  \n",
              "0  [0.0594153292, -0.0440569334, -0.0487333685, -...  \n",
              "1  [0.0247399714, -0.065658465, 0.0201423876, -0....  \n",
              "2  [0.0491479263, 0.0728017688, 0.0604138002, 0.0...  \n",
              "3  [0.0389556214, -0.0410280302, 0.0410280302, -0...  \n",
              "4  [0.118412666, -0.0127423415, 0.1185125113, 0.0...  "
            ]
          },
          "execution_count": 23,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Load Dataset\n",
        "import pandas as pd\n",
        "from datasets import load_dataset\n",
        "\n",
        "# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before running the code below\n",
        "# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
        "\n",
        "# https://huggingface.co/datasets/MongoDB/embedded_movies\n",
        "dataset = load_dataset(\n",
        "    \"MongoDB/subset_arxiv_papers_with_embeddings\", split=\"train\", streaming=True\n",
        ")\n",
        "dataset = dataset.take(4000)\n",
        "\n",
        "# Convert the dataset to a pandas dataframe\n",
        "dataset_df = pd.DataFrame(dataset)\n",
        "\n",
        "dataset_df.head(5)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AvMWghrqHgS8"
      },
      "source": [
        "## Data Cleaning"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 24,
      "metadata": {
        "id": "q3afC_2x9lRg"
      },
      "outputs": [],
      "source": [
        "# Remove rows where 'abstract' or 'title' is NA or empty\n",
        "dataset_df = dataset_df.dropna(subset=[\"abstract\", \"title\"])\n",
        "\n",
        "# Remove the embedding from each data point in the dataset as we are going to create new embeddings\n",
        "dataset_df = dataset_df.drop(columns=[\"embedding\"])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gHDE7fjKHj8z"
      },
      "source": [
        "## Embedding Generation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 25,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0,
          "referenced_widgets": [
            "087d6ccd1a96409eb2c0444c5250134c",
            "2db2b60216f6426a8ef3c5eb3f6c869e",
            "bc45def3ba344b25aa135e89c8d86bf9",
            "21aa0c6f9719489f8af7afeff8e626e3",
            "76c43e87073046f6968430bdf658e253",
            "04a4c590fd924de8bfc1dcfa82fd5607",
            "c1494426d4d149bea9bbd55d60e80890",
            "7aab02e3cd934f0298d8337bd541dac7",
            "41ad3cdf352d412d89f55475674d9e70",
            "d0083c5a955c4cc8839a3403ad95fe19",
            "5b3aedff89d147479b0124d6419c0ee2"
          ]
        },
        "id": "Fd4k1w9lAphP",
        "outputId": "1d709733-d100-4753-b653-dbfbb8f0b55f"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "087d6ccd1a96409eb2c0444c5250134c",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "Generating embeddings for columns: abstract, title:   0%|          | 0/4000 [00:00<?, ?it/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "import openai\n",
        "from tqdm.notebook import tqdm\n",
        "\n",
        "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
        "\n",
        "EMBEDDING_MODEL = \"text-embedding-3-small\"\n",
        "\n",
        "\n",
        "def get_embedding(text):\n",
        "    \"\"\"Generate an embedding for the given text using OpenAI's API.\"\"\"\n",
        "    if not text or not isinstance(text, str):\n",
        "        return None\n",
        "\n",
        "    try:\n",
        "        embedding = (\n",
        "            openai.embeddings.create(input=text, model=EMBEDDING_MODEL, dimensions=1536)\n",
        "            .data[0]\n",
        "            .embedding\n",
        "        )\n",
        "        return embedding\n",
        "    except Exception as e:\n",
        "        print(f\"Error in get_embedding: {e}\")\n",
        "        return None\n",
        "\n",
        "\n",
        "def combine_columns(row, columns):\n",
        "    \"\"\"Combine the contents of specified columns into a single string.\"\"\"\n",
        "    return \" \".join(str(row[col]) for col in columns if pd.notna(row[col]))\n",
        "\n",
        "\n",
        "def apply_embedding_with_progress(df, columns):\n",
        "    \"\"\"Apply embedding to concatenated text from multiple dataframe columns with a progress bar.\"\"\"\n",
        "    if not all(col in df.columns for col in columns):\n",
        "        missing_cols = [col for col in columns if col not in df.columns]\n",
        "        raise ValueError(f\"Columns {missing_cols} not found in the DataFrame.\")\n",
        "\n",
        "    tqdm.pandas(desc=f\"Generating embeddings for columns: {', '.join(columns)}\")\n",
        "\n",
        "    # Combine specified columns\n",
        "    df[\"combined_text\"] = df.apply(lambda row: combine_columns(row, columns), axis=1)\n",
        "\n",
        "    # Generate embeddings\n",
        "    df[\"embedding\"] = df[\"combined_text\"].progress_apply(get_embedding)\n",
        "\n",
        "    # Remove the temporary 'combined_text' column\n",
        "    df = df.drop(columns=[\"combined_text\"])\n",
        "\n",
        "    return df\n",
        "\n",
        "\n",
        "# Ggenerate embeddings based on 'abstract' and 'title' columns\n",
        "try:\n",
        "    # Ensure 'embedding' column is dropped if it exists\n",
        "    dataset_df = dataset_df.drop(columns=[\"embedding\"], errors=\"ignore\")\n",
        "\n",
        "    # Apply embeddings using multiple columns\n",
        "    columns_to_embed = [\n",
        "        \"abstract\",\n",
        "        \"title\",\n",
        "    ]  # Add or remove columns as needed (text only)\n",
        "    dataset_df = apply_embedding_with_progress(dataset_df, columns_to_embed)\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred: {e}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 30,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0
        },
        "id": "iLPetfBd8UBE",
        "outputId": "f1b2df68-aabe-42e8-bc9b-f1435ac6d06c"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"dataset_df[columns_to_embed + ['embedding']]\",\n  \"rows\": 5,\n  \"fields\": [\n    {\n      \"column\": \"abstract\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"  We describe a new algorithm, the $(k,\\\\ell)$-pebble game with colors, and use\\nit obtain a characterization of the family of $(k,\\\\ell)$-sparse graphs and\\nalgorithmic solutions to a family of problems concerning tree decompositions of\\ngraphs. Special instances of sparse graphs appear in rigidity theory and have\\nreceived increased attention in recent years. In particular, our colored\\npebbles generalize and strengthen the previous results of Lee and Streinu and\\ngive a new proof of the Tutte-Nash-Williams characterization of arboricity. We\\nalso present a new decomposition that certifies sparsity based on the\\n$(k,\\\\ell)$-pebble game with colors. Our work also exposes connections between\\npebble game algorithms and previous sparse graph algorithms by Gabow, Gabow and\\nWestermann and Hendrickson.\\n\",\n          \"  In this paper we show how to compute the $\\\\Lambda_{\\\\alpha}$ norm, $\\\\alpha\\\\ge\\n0$, using the dyadic grid. This result is a consequence of the description of\\nthe Hardy spaces $H^p(R^N)$ in terms of dyadic and special atoms.\\n\",\n          \"  The evolution of Earth-Moon system is described by the dark matter field\\nfluid model proposed in the Meeting of Division of Particle and Field 2004,\\nAmerican Physical Society. The current behavior of the Earth-Moon system agrees\\nwith this model very well and the general pattern of the evolution of the\\nMoon-Earth system described by this model agrees with geological and fossil\\nevidence. The closest distance of the Moon to Earth was about 259000 km at 4.5\\nbillion years ago, which is far beyond the Roche's limit. The result suggests\\nthat the tidal friction may not be the primary cause for the evolution of the\\nEarth-Moon system. The average dark matter field fluid constant derived from\\nEarth-Moon system data is 4.39 x 10^(-22) s^(-1)m^(-1). This model predicts\\nthat the Mars's rotation is also slowing with the angular acceleration rate\\nabout -4.38 x 10^(-22) rad s^(-2).\\n\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"title\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"Sparsity-certifying Graph Decompositions\",\n          \"From dyadic $\\\\Lambda_{\\\\alpha}$ to $\\\\Lambda_{\\\\alpha}$\",\n          \"The evolution of the Earth-Moon system based on the dark matter field\\n  fluid model\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embedding\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-e8f6022f-1720-46d6-92ad-9b1ec9f75ca1\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>abstract</th>\n",
              "      <th>title</th>\n",
              "      <th>embedding</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>A fully differential calculation in perturba...</td>\n",
              "      <td>Calculation of prompt diphoton production cros...</td>\n",
              "      <td>[0.04978983476758003, -0.027831584215164185, -...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>We describe a new algorithm, the $(k,\\ell)$-...</td>\n",
              "      <td>Sparsity-certifying Graph Decompositions</td>\n",
              "      <td>[0.021434221416711807, -0.030077634379267693, ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>The evolution of Earth-Moon system is descri...</td>\n",
              "      <td>The evolution of the Earth-Moon system based o...</td>\n",
              "      <td>[0.023649143055081367, 0.04319588467478752, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>We show that a determinant of Stirling cycle...</td>\n",
              "      <td>A determinant of Stirling cycle numbers counts...</td>\n",
              "      <td>[0.013857707381248474, -0.016583219170570374, ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>In this paper we show how to compute the $\\L...</td>\n",
              "      <td>From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...</td>\n",
              "      <td>[0.05201460048556328, 0.00613348139449954, 0.0...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e8f6022f-1720-46d6-92ad-9b1ec9f75ca1')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-e8f6022f-1720-46d6-92ad-9b1ec9f75ca1 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-e8f6022f-1720-46d6-92ad-9b1ec9f75ca1');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-8133cc15-212d-47db-b35b-de4e30e2d3c6\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8133cc15-212d-47db-b35b-de4e30e2d3c6')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-8133cc15-212d-47db-b35b-de4e30e2d3c6 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                                            abstract  \\\n",
              "0    A fully differential calculation in perturba...   \n",
              "1    We describe a new algorithm, the $(k,\\ell)$-...   \n",
              "2    The evolution of Earth-Moon system is descri...   \n",
              "3    We show that a determinant of Stirling cycle...   \n",
              "4    In this paper we show how to compute the $\\L...   \n",
              "\n",
              "                                               title  \\\n",
              "0  Calculation of prompt diphoton production cros...   \n",
              "1           Sparsity-certifying Graph Decompositions   \n",
              "2  The evolution of the Earth-Moon system based o...   \n",
              "3  A determinant of Stirling cycle numbers counts...   \n",
              "4  From dyadic $\\Lambda_{\\alpha}$ to $\\Lambda_{\\a...   \n",
              "\n",
              "                                           embedding  \n",
              "0  [0.04978983476758003, -0.027831584215164185, -...  \n",
              "1  [0.021434221416711807, -0.030077634379267693, ...  \n",
              "2  [0.023649143055081367, 0.04319588467478752, 0....  \n",
              "3  [0.013857707381248474, -0.016583219170570374, ...  \n",
              "4  [0.05201460048556328, 0.00613348139449954, 0.0...  "
            ]
          },
          "execution_count": 30,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Display the first few rows of the result\n",
        "dataset_df[columns_to_embed + [\"embedding\"]].head()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xX94fFjxHsMq"
      },
      "source": [
        "## MongoDB Vector Database and Connection Setup"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "men_m8r4HxaA"
      },
      "source": [
        "\n",
        "MongoDB acts as both an operational and a vector database for the RAG system.\n",
        "MongoDB Atlas specifically provides a database solution that efficiently stores, queries and retrieves vector embeddings.\n",
        "\n",
        "Creating a database and collection within MongoDB is made simple with MongoDB Atlas.\n",
        "\n",
        "1. First, register for a [MongoDB Atlas account](https://www.mongodb.com/cloud/atlas/register). For existing users, sign into MongoDB Atlas.\n",
        "2. [Follow the instructions](https://www.mongodb.com/docs/atlas/tutorial/deploy-free-tier-cluster/). Select Atlas UI as the procedure to deploy your first cluster.\n",
        "3. Create the database: `knowledge`.\n",
        "4. Within the database ` research_papers`, create the collection ‘listings_reviews’.\n",
        "5. Create a [vector search index](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#procedure/) named vector_index for the ‘listings_reviews’ collection. This index enables the RAG application to retrieve records as additional context to supplement user queries via vector search. Below is the JSON definition of the data collection vector search index.\n",
        "\n",
        "Your vector search index created on MongoDB Atlas should look like below:\n",
        "\n",
        "```\n",
        "{\n",
        "  \"fields\": [\n",
        "    {\n",
        "      \"numDimensions\": 1536,\n",
        "      \"path\": \"embedding\",\n",
        "      \"similarity\": \"cosine\",\n",
        "      \"type\": \"vector\"\n",
        "    }\n",
        "  ]\n",
        "}\n",
        "\n",
        "```\n",
        "\n",
        "Follow MongoDB’s [steps to get the connection](https://www.mongodb.com/docs/manual/reference/connection-string/) string from the Atlas UI. After setting up the database and obtaining the Atlas cluster connection URI, securely store the URI within your development environment.\n",
        "\n",
        "This guide uses Google Colab, which offers a feature for securely storing environment secrets. These secrets can then be accessed within the development environment. Specifically, the line mongo_uri = userdata.get('MONGO_URI') retrieves the URI from the secure storage."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "jbEm1UrwDH-_"
      },
      "outputs": [],
      "source": [
        "os.environ[\"MONGO_URI\"] = \"\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 31,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rt2FkK-nA61z",
        "outputId": "ab148d4e-871d-4c93-ecd8-d8ae8b963cd1"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Connection to MongoDB successful\n"
          ]
        }
      ],
      "source": [
        "import pymongo\n",
        "from google.colab import userdata\n",
        "\n",
        "\n",
        "def get_mongo_client(mongo_uri):\n",
        "    \"\"\"Establish and validate connection to the MongoDB.\"\"\"\n",
        "\n",
        "    client = pymongo.MongoClient(mongo_uri, appname=\"devrel.showcase.gemma2.python\")\n",
        "\n",
        "    # Validate the connection\n",
        "    ping_result = client.admin.command(\"ping\")\n",
        "    if ping_result.get(\"ok\") == 1.0:\n",
        "        # Connection successful\n",
        "        print(\"Connection to MongoDB successful\")\n",
        "        return client\n",
        "    print(\"Connection to MongoDB failed\")\n",
        "    return None\n",
        "\n",
        "\n",
        "mongo_uri = os.environ[\"MONGO_URI\"]\n",
        "\n",
        "if not mongo_uri:\n",
        "    print(\"MONGO_URI not set in environment variables\")\n",
        "\n",
        "mongo_client = get_mongo_client(mongo_uri)\n",
        "\n",
        "DB_NAME = \"knowledge\"\n",
        "COLLECTION_NAME = \"research_papers\"\n",
        "\n",
        "db = mongo_client.get_database(DB_NAME)\n",
        "collection = db.get_collection(COLLECTION_NAME)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 32,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "K1S3sz4qA8nH",
        "outputId": "56bff222-81fc-4d4c-97bd-9f2dc27c5fa8"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "DeleteResult({'n': 0, 'electionId': ObjectId('7fffffff000000000000002a'), 'opTime': {'ts': Timestamp(1719597926, 1), 't': 42}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1719597926, 1), 'signature': {'hash': b'\\xb3\\xc2\\xbaK\\x7f\\x82\\xe0m`\\xea\\xfa\\x94H\\x15/\\xc7M!*i', 'keyId': 7320226449804230662}}, 'operationTime': Timestamp(1719597926, 1)}, acknowledged=True)"
            ]
          },
          "execution_count": 32,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Delete any existing records in the collection\n",
        "collection.delete_many({})"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "1f-rhtD1IFPc"
      },
      "source": [
        "## Data Ingestion"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 33,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xM0b2J93A9tT",
        "outputId": "292a60cb-5d22-4a59-d035-bf8ceb0b4947"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Data ingestion into MongoDB completed\n"
          ]
        }
      ],
      "source": [
        "# Ingest data into MongoDB\n",
        "try:\n",
        "    collection.insert_many(dataset_df.to_dict(\"records\"))\n",
        "    print(\"Data ingestion into MongoDB completed\")\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred during data ingestion: {e}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5CJmCCdtIHwO"
      },
      "source": [
        "## Vector Search Operation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 34,
      "metadata": {
        "id": "6ThIpEGABBIL"
      },
      "outputs": [],
      "source": [
        "def vector_search(user_query, collection):\n",
        "    \"\"\"\n",
        "    Perform a vector search in the MongoDB collection based on the user query.\n",
        "\n",
        "    Args:\n",
        "    user_query (str): The user's query string.\n",
        "    collection (MongoCollection): The MongoDB collection to search.\n",
        "\n",
        "    Returns:\n",
        "    list: A list of matching documents.\n",
        "    \"\"\"\n",
        "\n",
        "    # Generate embedding for the user query\n",
        "    query_embedding = get_embedding(user_query)\n",
        "\n",
        "    if query_embedding is None:\n",
        "        return \"Invalid query or embedding generation failed.\"\n",
        "\n",
        "    # Define the vector search pipeline\n",
        "    vector_search_stage = {\n",
        "        \"$vectorSearch\": {\n",
        "            \"index\": \"vector_index\",\n",
        "            \"queryVector\": query_embedding,\n",
        "            \"path\": \"embedding\",\n",
        "            \"numCandidates\": 150,  # Number of candidate matches to consider\n",
        "            \"limit\": 4,  # Return top 4 matches\n",
        "        }\n",
        "    }\n",
        "\n",
        "    project_stage = {\n",
        "        \"$project\": {\n",
        "            \"_id\": 0,  # Exclude the _id field\n",
        "            \"fullplot\": 1,  # Include the plot field\n",
        "            \"title\": 1,  # Include the title field\n",
        "            \"genres\": 1,  # Include the genres field\n",
        "            \"score\": {\n",
        "                \"$meta\": \"vectorSearchScore\"  # Include the search score\n",
        "            },\n",
        "        }\n",
        "    }\n",
        "\n",
        "    pipeline = [vector_search_stage, project_stage]\n",
        "\n",
        "    # Execute the search\n",
        "    results = collection.aggregate(pipeline)\n",
        "    return list(results)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eKs8LQt1IL2s"
      },
      "source": [
        "## Handle User Results"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 35,
      "metadata": {
        "id": "dweK1crhBCg1"
      },
      "outputs": [],
      "source": [
        "def get_search_result(query, collection):\n",
        "    get_knowledge = vector_search(query, collection)\n",
        "\n",
        "    search_result = \"\"\n",
        "    for result in get_knowledge:\n",
        "        search_result += f\"Title: {result.get('title', 'N/A')}, Plot: {result.get('fullplot', 'N/A')}\\n\"\n",
        "\n",
        "    return search_result"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 49,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aN3RG32hBDkB",
        "outputId": "bb50083b-9ff5-4bb3-e887-b9a13a04fc92"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Query: Give me a recommended paper on machine learning\n",
            "Continue to answer the query by using the Search Results:\n",
            "Title: Using Access Data for Paper Recommendations on ArXiv.org, Plot: N/A\n",
            "Title: Missing Data: A Comparison of Neural Network and Expectation\n",
            "  Maximisation Techniques, Plot: N/A\n",
            "Title: An Adaptive Strategy for the Classification of G-Protein Coupled\n",
            "  Receptors, Plot: N/A\n",
            "Title: A multivariate approach to heavy flavour tagging with cascade training, Plot: N/A\n",
            ".\n"
          ]
        }
      ],
      "source": [
        "# Conduct query with retrival of sources\n",
        "query = \"Give me a recommended paper on machine learning\"\n",
        "source_information = get_search_result(query, collection)\n",
        "combined_information = f\"Query: {query}\\nContinue to answer the query by using the Search Results:\\n{source_information}.\"\n",
        "\n",
        "print(combined_information)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ODA49ScKIPUW"
      },
      "source": [
        "## Keras Config and Markdown"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 39,
      "metadata": {
        "id": "Nxj9QM2AU8oU"
      },
      "outputs": [],
      "source": [
        "import textwrap\n",
        "\n",
        "import keras\n",
        "import keras_nlp\n",
        "from IPython.display import Markdown\n",
        "\n",
        "# Run at half precision.\n",
        "keras.config.set_floatx(\"bfloat16\")\n",
        "\n",
        "\n",
        "def to_markdown(text):\n",
        "    text = text.replace(\"•\", \"  *\")\n",
        "    return Markdown(textwrap.indent(text, \"> \", predicate=lambda _: True))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IL4E2rqSITcY"
      },
      "source": [
        "## Handle Response Generation and History"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 40,
      "metadata": {
        "id": "pVgkHvRyMTnD"
      },
      "outputs": [],
      "source": [
        "from typing import Dict, Optional\n",
        "\n",
        "\n",
        "class GemmaChat:\n",
        "    __START_TURN__ = \"<start_of_turn>\"\n",
        "    __END_TURN__ = \"<end_of_turn>\"\n",
        "    __SYSTEM_STOP__ = \"<eos>\"\n",
        "\n",
        "    def __init__(\n",
        "        self, model, system: str = \"\", history: Optional[Dict[str, str]] = None\n",
        "    ):\n",
        "        self.model = model\n",
        "        self.system = system\n",
        "        self.history_params = history or {}\n",
        "        self.client = pymongo.MongoClient(\n",
        "            self.history_params.get(\"connection_string\", \"mongodb://localhost:27017/\")\n",
        "        )\n",
        "        self.db = self.client[self.history_params.get(\"database\", \"gemma_chat\")]\n",
        "        self.collection = self.db[self.history_params.get(\"collection\", \"chat_history\")]\n",
        "        self.session_id = self.history_params.get(\"session_id\", \"default_session\")\n",
        "\n",
        "    def format_message(self, message: str, prefix: str = \"\") -> str:\n",
        "        return f\"{self.__START_TURN__}{prefix}\\n{message}{self.__END_TURN__}\\n\"\n",
        "\n",
        "    def add_to_history(self, message: str, prefix: str = \"\"):\n",
        "        formatted_message = self.format_message(message, prefix)\n",
        "        self.collection.insert_one(\n",
        "            {\"session_id\": self.session_id, \"message\": formatted_message}\n",
        "        )\n",
        "\n",
        "    def get_full_prompt(self) -> str:\n",
        "        history = self.collection.find({\"session_id\": self.session_id}).sort(\"_id\", 1)\n",
        "        prompt = self.system + \"\\n\" + \"\\n\".join([item[\"message\"] for item in history])\n",
        "        return prompt\n",
        "\n",
        "    def send_message(self, message: str) -> str:\n",
        "        self.add_to_history(message, \"user\")\n",
        "        prompt = self.get_full_prompt()\n",
        "        response = self.model.generate(prompt, max_length=2048)\n",
        "        result = response.replace(prompt, \"\").replace(self.__SYSTEM_STOP__, \"\")\n",
        "        self.add_to_history(result, \"model\")\n",
        "        return result\n",
        "\n",
        "    def show_history(self):\n",
        "        history = self.collection.find({\"session_id\": self.session_id}).sort(\"_id\", 1)\n",
        "        for item in history:\n",
        "            print(item[\"message\"])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gTrkcZFvIYs4"
      },
      "source": [
        "## Gemma2 Model Initalisation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 41,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0,
          "referenced_widgets": [
            "17020b70954a47798e83839a008b8228",
            "c62c5fb067c444798d16f9d25ba4baaa",
            "604a6135413d4f918d74c5d3c2d9e1f7",
            "ceb781eabbc042ada6832329ef0af03c",
            "30b0c4debb2c4866ac96ae53dd744173",
            "128898b0cfa14f6096c5234ac3206569",
            "c72282f9678c4950b9e591d54ae44b0b",
            "3c3e6a10a2cb4760b947207dc8cead0b",
            "fdb2ec1e597443a9ac2d008f8b5c2c34",
            "73733891708d4797869041e13f3b10fd",
            "2ecd1d594c14490bbc9a49372416c602",
            "e88f64ab7d9f4ddebd92ff9faee68158",
            "ef5af207a278442082fec773f83e6d80",
            "ef091283ab4c4092b07d280312c02749",
            "74d367d04bc54464b6a530554854a1d4",
            "344505fb387b49ecb9c6cc73a9a11cf9",
            "1c36a77cc04b45c990c656d7d53aee79",
            "5d59bbf9c7ca488997a359198c09bdec",
            "d2ce2dc93780484bad997f7e50f28c38",
            "87874f599b6f4bd48030ff04a87d6a17",
            "9726f31f6df74ce08f074f09e3c0f79c",
            "21982ccc777a4277b4487ed91aaca1ec",
            "a2da32caea9a4b24865fa31b5a1eea51",
            "c0764877ea0442229e5b255b71e47e99",
            "13a6ada0115d425b910b806da996488e",
            "9dbadc68dd404d0697afc07d180cc786",
            "2e9df743ea2a4f7bbe48e247e4e3e6b7",
            "57a5f43bc22e49ea8aa835119ef1339c",
            "925136d6932041aa8f8a0cf18cd96eef",
            "25e91755f5fe4ea2af182f641fdfa749",
            "416c7008b7e34bc5969fac8612f06504",
            "bbfe1404667a4fa1a00f18d4d84757b2",
            "03fea3ec95084c54b2079e8dbfdbd549",
            "ec8faadd88194d3b8acc53f4928652a6",
            "ec0085db3d224e9aa29013058470ffc1",
            "af09307201564a34811a0a1017b6c780",
            "e933c22e47c64b5087afd4cbb4d32df8",
            "99e9f32ceccd40f285ac85ffe3d0ab17",
            "ab7e23892e7647fd8380f758c8ce0c50",
            "facfcdf4bf854cfdaf64aec483649999",
            "41112befec9943638ae5bd620a377e39",
            "881b69e8fce54d239cc4db25c2cac32f",
            "edfc18387be54809b823d68f469c7eb6",
            "49eb2d590df44762949c1152785b8384"
          ]
        },
        "id": "ClDpsGfzMmIS",
        "outputId": "dc05316a-c5a9-47f1-9263-076f07646eb0"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "17020b70954a47798e83839a008b8228",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "metadata.json:   0%|          | 0.00/143 [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "e88f64ab7d9f4ddebd92ff9faee68158",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "task.json:   0%|          | 0.00/2.25k [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "a2da32caea9a4b24865fa31b5a1eea51",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "model.weights.h5:   0%|          | 0.00/18.5G [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "ec8faadd88194d3b8acc53f4928652a6",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "vocabulary.spm:   0%|          | 0.00/4.24M [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Preprocessor: \"gemma_causal_lm_preprocessor\"</span>\n",
              "</pre>\n"
            ],
            "text/plain": [
              "\u001b[1mPreprocessor: \"gemma_causal_lm_preprocessor\"\u001b[0m\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
              "┃<span style=\"font-weight: bold\"> Tokenizer (type)                                   </span>┃<span style=\"font-weight: bold\">                                             Vocab # </span>┃\n",
              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
              "│ gemma_tokenizer (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GemmaTokenizer</span>)                   │                                             <span style=\"color: #00af00; text-decoration-color: #00af00\">256,000</span> │\n",
              "└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n",
              "</pre>\n"
            ],
            "text/plain": [
              "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
              "┃\u001b[1m \u001b[0m\u001b[1mTokenizer (type)                                  \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m                                            Vocab #\u001b[0m\u001b[1m \u001b[0m┃\n",
              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
              "│ gemma_tokenizer (\u001b[38;5;33mGemmaTokenizer\u001b[0m)                   │                                             \u001b[38;5;34m256,000\u001b[0m │\n",
              "└────────────────────────────────────────────────────┴─────────────────────────────────────────────────────┘\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"gemma_causal_lm\"</span>\n",
              "</pre>\n"
            ],
            "text/plain": [
              "\u001b[1mModel: \"gemma_causal_lm\"\u001b[0m\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
              "┃<span style=\"font-weight: bold\"> Layer (type)                  </span>┃<span style=\"font-weight: bold\"> Output Shape              </span>┃<span style=\"font-weight: bold\">         Param # </span>┃<span style=\"font-weight: bold\"> Connected to               </span>┃\n",
              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
              "│ padding_mask (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)     │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)              │               <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                          │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ token_ids (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">InputLayer</span>)        │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>)              │               <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │ -                          │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ gemma_backbone                │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">3584</span>)        │   <span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> │ padding_mask[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>],        │\n",
              "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GemmaBackbone</span>)               │                           │                 │ token_ids[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]            │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ token_embedding               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256000</span>)      │     <span style=\"color: #00af00; text-decoration-color: #00af00\">917,504,000</span> │ gemma_backbone[<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>][<span style=\"color: #00af00; text-decoration-color: #00af00\">0</span>]       │\n",
              "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">ReversibleEmbedding</span>)         │                           │                 │                            │\n",
              "└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘\n",
              "</pre>\n"
            ],
            "text/plain": [
              "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓\n",
              "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                 \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape             \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m        Param #\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mConnected to              \u001b[0m\u001b[1m \u001b[0m┃\n",
              "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩\n",
              "│ padding_mask (\u001b[38;5;33mInputLayer\u001b[0m)     │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)              │               \u001b[38;5;34m0\u001b[0m │ -                          │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ token_ids (\u001b[38;5;33mInputLayer\u001b[0m)        │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m)              │               \u001b[38;5;34m0\u001b[0m │ -                          │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ gemma_backbone                │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m3584\u001b[0m)        │   \u001b[38;5;34m9,241,705,984\u001b[0m │ padding_mask[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m],        │\n",
              "│ (\u001b[38;5;33mGemmaBackbone\u001b[0m)               │                           │                 │ token_ids[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]            │\n",
              "├───────────────────────────────┼───────────────────────────┼─────────────────┼────────────────────────────┤\n",
              "│ token_embedding               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256000\u001b[0m)      │     \u001b[38;5;34m917,504,000\u001b[0m │ gemma_backbone[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m]       │\n",
              "│ (\u001b[38;5;33mReversibleEmbedding\u001b[0m)         │                           │                 │                            │\n",
              "└───────────────────────────────┴───────────────────────────┴─────────────────┴────────────────────────────┘\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> (17.21 GB)\n",
              "</pre>\n"
            ],
            "text/plain": [
              "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m9,241,705,984\u001b[0m (17.21 GB)\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">9,241,705,984</span> (17.21 GB)\n",
              "</pre>\n"
            ],
            "text/plain": [
              "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m9,241,705,984\u001b[0m (17.21 GB)\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "data": {
            "text/html": [
              "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
              "</pre>\n"
            ],
            "text/plain": [
              "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset(\n",
        "    \"hf://gg-tt/gemma-2-instruct-9b-keras\"\n",
        ")\n",
        "gemma_lm.summary()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 42,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0
        },
        "id": "wQoahzsN-V7J",
        "outputId": "d5bc726b-a317-40e1-a5ab-de345a0f6688"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "CPU times: user 56.1 s, sys: 741 ms, total: 56.8 s\n",
            "Wall time: 36.7 s\n"
          ]
        },
        {
          "data": {
            "text/markdown": [
              "> What are your current capabilities?\n",
              "> \n",
              "> As a large language model, I am trained on a massive dataset of text and code. This allows me to perform a variety of tasks, including:\n",
              "> \n",
              "> * **Generating text:** I can write stories, articles, poems, and other types of creative content.\n",
              "> * **Translating languages:** I can translate text from one language to another.\n",
              "> * **Summarizing text:** I can provide concise summaries of long pieces of text.\n",
              "> * **Answering questions:** I can answer questions based on the information I have been trained on.\n",
              "> * **Coding:** I can generate and understand code in multiple programming languages.\n",
              "> \n",
              "> **However, it is important to note that I am still under development and my abilities are constantly evolving.** I am not able to access real-time information or interact with the physical world. I also do not have personal opinions or beliefs.\n",
              "> \n",
              "> My purpose is to assist users with their language-based tasks and provide helpful information.<end_of_turn>\n"
            ],
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ]
          },
          "execution_count": 42,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Testing Gemma\n",
        "%time result = gemma_lm.generate(\"What are your current capabilities?\", max_length=256)\n",
        "to_markdown(result)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aQV8jMFeIesW"
      },
      "source": [
        "## Query Gemma 2 with Retrieved Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 50,
      "metadata": {
        "id": "zkpEIMoCMTtG"
      },
      "outputs": [],
      "source": [
        "history_params = {\n",
        "    \"connection_string\": userdata.get(\"MONGO_URI\"),\n",
        "    \"database\": DB_NAME,\n",
        "    \"collection\": \"chat_history\",\n",
        "    \"session_id\": \"unique_session_id\",\n",
        "}\n",
        "\n",
        "gemma_chat = GemmaChat(\n",
        "    gemma_lm, system=\"You are a research assistant\", history=history_params\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 51,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 167
        },
        "id": "Ag3-0Mn6Oa6C",
        "outputId": "77d697e3-d134-4eda-8bad-98528eb3adfc"
      },
      "outputs": [
        {
          "data": {
            "text/markdown": [
              "> \n",
              "> \n",
              "> Based on your search results, I'd recommend **\"Missing Data: A Comparison of Neural Network and Expectation Maximisation Techniques\"**. \n",
              "> \n",
              "> Here's why:\n",
              "> \n",
              "> * **Relevance to Machine Learning:** This paper directly addresses a common challenge in machine learning: handling missing data. \n",
              "> * **Comparison of Methods:** It compares two popular approaches for dealing with missing data – neural networks and Expectation Maximisation – which is valuable for understanding the strengths and weaknesses of different techniques.\n",
              "> \n",
              "> \n",
              "> Let me know if you'd like to explore other papers based on specific aspects of machine learning! \n",
              "> <end_of_turn>\n"
            ],
            "text/plain": [
              "<IPython.core.display.Markdown object>"
            ]
          },
          "execution_count": 51,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "result = gemma_chat.send_message(combined_information)\n",
        "to_markdown(result)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "L6iZKmY0Ijsw"
      },
      "source": [
        "## View Chat History"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 52,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OUEpnQ9KVH55",
        "outputId": "36c80fd7-8875-4275-9666-5e2c01192753"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "<start_of_turn>user\n",
            "Query: What is the best romantic movie to watch and why?\n",
            "Continue to answer the query by using the Search Results:\n",
            "Title: Non-Associativity of Lorentz Transformation and Associative Reflection\n",
            "  Symmetric Transformation, Plot: N/A\n",
            "Title: Erwin Schroedinger, Francis Crick and epigenetic stability, Plot: N/A\n",
            "Title: Time and motion in physics: the Reciprocity Principle, relativistic\n",
            "  invariance of the lengths of rulers and time dilatation, Plot: N/A\n",
            "Title: XMM-Newton X-ray Observations of the Wolf-Rayet Binary System WR 147, Plot: N/A\n",
            ".<end_of_turn>\n",
            "\n",
            "<start_of_turn>model\n",
            "\n",
            "\n",
            "It seems like you've provided me with some scientific research papers rather than movie titles!  \n",
            "\n",
            "To recommend a great romantic movie, I need information about what kind of romance you're looking for. \n",
            "\n",
            "For example, do you prefer:\n",
            "\n",
            "* **Classic romantic comedies?**\n",
            "* **Heartbreaking dramas?**\n",
            "* **Something lighthearted and fun?**\n",
            "* **A movie with a historical setting?**\n",
            "\n",
            "\n",
            "Tell me more about your taste, and I'll give you a personalized recommendation! \n",
            "<end_of_turn>\n",
            "<end_of_turn>\n",
            "\n",
            "<start_of_turn>user\n",
            "Query: What is the best romantic movie to watch and why?\n",
            "Continue to answer the query by using the Search Results:\n",
            "Title: Non-Associativity of Lorentz Transformation and Associative Reflection\n",
            "  Symmetric Transformation, Plot: N/A\n",
            "Title: Erwin Schroedinger, Francis Crick and epigenetic stability, Plot: N/A\n",
            "Title: Time and motion in physics: the Reciprocity Principle, relativistic\n",
            "  invariance of the lengths of rulers and time dilatation, Plot: N/A\n",
            "Title: XMM-Newton X-ray Observations of the Wolf-Rayet Binary System WR 147, Plot: N/A\n",
            ".<end_of_turn>\n",
            "\n",
            "<start_of_turn>model\n",
            "\n",
            "\n",
            "You've given me a list of scientific papers again!  Those are fascinating, but they won't help me pick a romantic movie.  \n",
            "\n",
            "To give you a good recommendation, I need to know what kind of romantic movie you'd enjoy.  \n",
            "\n",
            "Do you like:\n",
            "\n",
            "* **Funny romances?**\n",
            "* **Tearjerkers?**\n",
            "* **Movies set in a particular time period?**\n",
            "* **Something with a unique plot?**\n",
            "\n",
            "\n",
            "\n",
            "Tell me more about what you're looking for! \n",
            "<end_of_turn>\n",
            "<end_of_turn>\n",
            "\n",
            "<start_of_turn>user\n",
            "Query: Give me a recommended paper on machine learning\n",
            "Continue to answer the query by using the Search Results:\n",
            "Title: Using Access Data for Paper Recommendations on ArXiv.org, Plot: N/A\n",
            "Title: Missing Data: A Comparison of Neural Network and Expectation\n",
            "  Maximisation Techniques, Plot: N/A\n",
            "Title: An Adaptive Strategy for the Classification of G-Protein Coupled\n",
            "  Receptors, Plot: N/A\n",
            "Title: A multivariate approach to heavy flavour tagging with cascade training, Plot: N/A\n",
            ".<end_of_turn>\n",
            "\n",
            "<start_of_turn>model\n",
            "\n",
            "\n",
            "Based on your search results, I'd recommend **\"Missing Data: A Comparison of Neural Network and Expectation Maximisation Techniques\"**. \n",
            "\n",
            "Here's why:\n",
            "\n",
            "* **Relevance to Machine Learning:** This paper directly addresses a common challenge in machine learning: handling missing data. \n",
            "* **Comparison of Methods:** It compares two popular approaches for dealing with missing data – neural networks and Expectation Maximisation – which is valuable for understanding the strengths and weaknesses of different techniques.\n",
            "\n",
            "\n",
            "Let me know if you'd like to explore other papers based on specific aspects of machine learning! \n",
            "<end_of_turn>\n",
            "<end_of_turn>\n",
            "\n"
          ]
        }
      ],
      "source": [
        "gemma_chat.show_history()"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "A100",
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "03fea3ec95084c54b2079e8dbfdbd549": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "04a4c590fd924de8bfc1dcfa82fd5607": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "087d6ccd1a96409eb2c0444c5250134c": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_2db2b60216f6426a8ef3c5eb3f6c869e",
              "IPY_MODEL_bc45def3ba344b25aa135e89c8d86bf9",
              "IPY_MODEL_21aa0c6f9719489f8af7afeff8e626e3"
            ],
            "layout": "IPY_MODEL_76c43e87073046f6968430bdf658e253"
          }
        },
        "128898b0cfa14f6096c5234ac3206569": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "13a6ada0115d425b910b806da996488e": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_25e91755f5fe4ea2af182f641fdfa749",
            "max": 18484904872,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_416c7008b7e34bc5969fac8612f06504",
            "value": 18484904872
          }
        },
        "17020b70954a47798e83839a008b8228": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c62c5fb067c444798d16f9d25ba4baaa",
              "IPY_MODEL_604a6135413d4f918d74c5d3c2d9e1f7",
              "IPY_MODEL_ceb781eabbc042ada6832329ef0af03c"
            ],
            "layout": "IPY_MODEL_30b0c4debb2c4866ac96ae53dd744173"
          }
        },
        "1c36a77cc04b45c990c656d7d53aee79": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "21982ccc777a4277b4487ed91aaca1ec": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "21aa0c6f9719489f8af7afeff8e626e3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d0083c5a955c4cc8839a3403ad95fe19",
            "placeholder": "​",
            "style": "IPY_MODEL_5b3aedff89d147479b0124d6419c0ee2",
            "value": " 4000/4000 [14:22&lt;00:00,  5.25it/s]"
          }
        },
        "25e91755f5fe4ea2af182f641fdfa749": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2db2b60216f6426a8ef3c5eb3f6c869e": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_04a4c590fd924de8bfc1dcfa82fd5607",
            "placeholder": "​",
            "style": "IPY_MODEL_c1494426d4d149bea9bbd55d60e80890",
            "value": "Generating embeddings for columns: abstract, title: 100%"
          }
        },
        "2e9df743ea2a4f7bbe48e247e4e3e6b7": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2ecd1d594c14490bbc9a49372416c602": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "30b0c4debb2c4866ac96ae53dd744173": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "344505fb387b49ecb9c6cc73a9a11cf9": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3c3e6a10a2cb4760b947207dc8cead0b": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "41112befec9943638ae5bd620a377e39": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "416c7008b7e34bc5969fac8612f06504": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "41ad3cdf352d412d89f55475674d9e70": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "49eb2d590df44762949c1152785b8384": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "57a5f43bc22e49ea8aa835119ef1339c": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5b3aedff89d147479b0124d6419c0ee2": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "5d59bbf9c7ca488997a359198c09bdec": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "604a6135413d4f918d74c5d3c2d9e1f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3c3e6a10a2cb4760b947207dc8cead0b",
            "max": 143,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_fdb2ec1e597443a9ac2d008f8b5c2c34",
            "value": 143
          }
        },
        "73733891708d4797869041e13f3b10fd": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "74d367d04bc54464b6a530554854a1d4": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9726f31f6df74ce08f074f09e3c0f79c",
            "placeholder": "​",
            "style": "IPY_MODEL_21982ccc777a4277b4487ed91aaca1ec",
            "value": " 2.25k/2.25k [00:00&lt;00:00, 179kB/s]"
          }
        },
        "76c43e87073046f6968430bdf658e253": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "7aab02e3cd934f0298d8337bd541dac7": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "87874f599b6f4bd48030ff04a87d6a17": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "881b69e8fce54d239cc4db25c2cac32f": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "925136d6932041aa8f8a0cf18cd96eef": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9726f31f6df74ce08f074f09e3c0f79c": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "99e9f32ceccd40f285ac85ffe3d0ab17": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9dbadc68dd404d0697afc07d180cc786": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bbfe1404667a4fa1a00f18d4d84757b2",
            "placeholder": "​",
            "style": "IPY_MODEL_03fea3ec95084c54b2079e8dbfdbd549",
            "value": " 18.5G/18.5G [04:00&lt;00:00, 83.8MB/s]"
          }
        },
        "a2da32caea9a4b24865fa31b5a1eea51": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c0764877ea0442229e5b255b71e47e99",
              "IPY_MODEL_13a6ada0115d425b910b806da996488e",
              "IPY_MODEL_9dbadc68dd404d0697afc07d180cc786"
            ],
            "layout": "IPY_MODEL_2e9df743ea2a4f7bbe48e247e4e3e6b7"
          }
        },
        "ab7e23892e7647fd8380f758c8ce0c50": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "af09307201564a34811a0a1017b6c780": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_41112befec9943638ae5bd620a377e39",
            "max": 4241003,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_881b69e8fce54d239cc4db25c2cac32f",
            "value": 4241003
          }
        },
        "bbfe1404667a4fa1a00f18d4d84757b2": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bc45def3ba344b25aa135e89c8d86bf9": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7aab02e3cd934f0298d8337bd541dac7",
            "max": 4000,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_41ad3cdf352d412d89f55475674d9e70",
            "value": 4000
          }
        },
        "c0764877ea0442229e5b255b71e47e99": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_57a5f43bc22e49ea8aa835119ef1339c",
            "placeholder": "​",
            "style": "IPY_MODEL_925136d6932041aa8f8a0cf18cd96eef",
            "value": "model.weights.h5: 100%"
          }
        },
        "c1494426d4d149bea9bbd55d60e80890": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c62c5fb067c444798d16f9d25ba4baaa": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_128898b0cfa14f6096c5234ac3206569",
            "placeholder": "​",
            "style": "IPY_MODEL_c72282f9678c4950b9e591d54ae44b0b",
            "value": "metadata.json: 100%"
          }
        },
        "c72282f9678c4950b9e591d54ae44b0b": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ceb781eabbc042ada6832329ef0af03c": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_73733891708d4797869041e13f3b10fd",
            "placeholder": "​",
            "style": "IPY_MODEL_2ecd1d594c14490bbc9a49372416c602",
            "value": " 143/143 [00:00&lt;00:00, 12.4kB/s]"
          }
        },
        "d0083c5a955c4cc8839a3403ad95fe19": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d2ce2dc93780484bad997f7e50f28c38": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e88f64ab7d9f4ddebd92ff9faee68158": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ef5af207a278442082fec773f83e6d80",
              "IPY_MODEL_ef091283ab4c4092b07d280312c02749",
              "IPY_MODEL_74d367d04bc54464b6a530554854a1d4"
            ],
            "layout": "IPY_MODEL_344505fb387b49ecb9c6cc73a9a11cf9"
          }
        },
        "e933c22e47c64b5087afd4cbb4d32df8": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_edfc18387be54809b823d68f469c7eb6",
            "placeholder": "​",
            "style": "IPY_MODEL_49eb2d590df44762949c1152785b8384",
            "value": " 4.24M/4.24M [00:00&lt;00:00, 17.4MB/s]"
          }
        },
        "ec0085db3d224e9aa29013058470ffc1": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ab7e23892e7647fd8380f758c8ce0c50",
            "placeholder": "​",
            "style": "IPY_MODEL_facfcdf4bf854cfdaf64aec483649999",
            "value": "vocabulary.spm: 100%"
          }
        },
        "ec8faadd88194d3b8acc53f4928652a6": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ec0085db3d224e9aa29013058470ffc1",
              "IPY_MODEL_af09307201564a34811a0a1017b6c780",
              "IPY_MODEL_e933c22e47c64b5087afd4cbb4d32df8"
            ],
            "layout": "IPY_MODEL_99e9f32ceccd40f285ac85ffe3d0ab17"
          }
        },
        "edfc18387be54809b823d68f469c7eb6": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef091283ab4c4092b07d280312c02749": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d2ce2dc93780484bad997f7e50f28c38",
            "max": 2249,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_87874f599b6f4bd48030ff04a87d6a17",
            "value": 2249
          }
        },
        "ef5af207a278442082fec773f83e6d80": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1c36a77cc04b45c990c656d7d53aee79",
            "placeholder": "​",
            "style": "IPY_MODEL_5d59bbf9c7ca488997a359198c09bdec",
            "value": "task.json: 100%"
          }
        },
        "facfcdf4bf854cfdaf64aec483649999": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fdb2ec1e597443a9ac2d008f8b5c2c34": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "state": {}
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
